%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
base = pd.read_csv("../data/InterfaceReview-May2019.tsv", sep="\t", index_col= [0,1], skipinitialspace=True)
base.head()
base.loc['apis','IIIF Image API']
base.loc['information on digitization','OCR confidence scores'].describe()
base.loc['newspaper metadata','Place of publication']
# counts will not work for categorical data
base.loc['newspaper collection', 'Languages of the collections']
def trim_all_columns(df):
"""
Trim whitespace from ends of each value across all series in dataframe
"""
trim_strings = lambda x: x.strip() if type(x) is str else x
return df.applymap(trim_strings)
# trim
base = trim_all_columns(base)
base = base.drop('interface', level=0)
base = base.drop('newspaper collection', level=0)
base = base.drop('Other', level=1)
base = base.drop('Languages of the collections', level=1)
base = base.drop('Download options (file formats)', level=1)
base = base.replace(to_replace=['y', 'y?', 'y (annotations)', 'y (requires user account - free)', 'n', '?', 'u', 'n?'], value=[1,1,1,1,0,0,0,0])
base.head()
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
def radar_factory(num_vars, frame='circle'):
"""Create a radar chart with `num_vars` axes.
This function creates a RadarAxes projection and registers it.
Parameters
----------
num_vars : int
Number of variables for radar chart.
frame : {'circle' | 'polygon'}
Shape of frame surrounding axes.
"""
# calculate evenly-spaced axis angles
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
class RadarAxes(PolarAxes):
name = 'radar'
# use 1 line segment to connect specified points
RESOLUTION = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# rotate plot such that the first axis is at the top
self.set_theta_zero_location('N')
def fill(self, *args, closed=True, **kwargs):
"""Override fill so that line is closed by default"""
return super().fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super().plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
# FIXME: markers at x[0], y[0] get doubled-up
if x[0] != x[-1]:
x = np.concatenate((x, [x[0]]))
y = np.concatenate((y, [y[0]]))
line.set_data(x, y)
def set_varlabels(self, labels, fontsize):
self.set_thetagrids(np.degrees(theta), labels, fontsize=fontsize)
def _gen_axes_patch(self):
# The Axes patch must be centered at (0.5, 0.5) and of radius 0.5
# in axes coordinates.
if frame == 'circle':
return Circle((0.5, 0.5), 0.5)
elif frame == 'polygon':
return RegularPolygon((0.5, 0.5), num_vars,
radius=.5, edgecolor="k")
else:
raise ValueError("unknown value for 'frame': %s" % frame)
def _gen_axes_spines(self):
if frame == 'circle':
return super()._gen_axes_spines()
elif frame == 'polygon':
# spine_type must be 'left'/'right'/'top'/'bottom'/'circle'.
spine = Spine(axes=self,
spine_type='circle',
path=Path.unit_regular_polygon(num_vars))
# unit_regular_polygon gives a polygon of radius 1 centered at
# (0, 0) but we want a polygon of radius 0.5 centered at (0.5,
# 0.5) in axes coordinates.
spine.set_transform(Affine2D().scale(.5).translate(.5, .5)
+ self.transAxes)
return {'polar': spine}
else:
raise ValueError("unknown value for 'frame': %s" % frame)
register_projection(RadarAxes)
return theta
def build_single_radar(labels, values, title, grid, figure_title):
N = len(labels)
theta = radar_factory(N, frame='polygon')
fig, ax = plt.subplots(figsize=(10,10), subplot_kw=dict(projection='radar'))
fig.subplots_adjust(top=0.85, bottom=0.05)
#ax.set_rgrids([2, 4, 6, 8])
ax.set_rgrids(grid, labels=[str(i) for i in grid], size='large')
ax.set_title(title, position=(0.5, 1.1), ha='center')
for d in values:
line = ax.plot(theta, d)
ax.fill(theta, d, alpha=0.25)
ax.set_varlabels(labels, fontsize=12)
if figure_title is None:
plt.show()
else:
plt.savefig(f'../charts/{figure_title}.pdf', format='pdf', quality=95)
def build_multiple_radar(labels, values, titles, grid, figure_title):
N = len(labels)
theta = radar_factory(N, frame='polygon')
fig, axes = plt.subplots(figsize=(120, 80), nrows=4, ncols=6,
subplot_kw=dict(projection='radar'))
fig.subplots_adjust(wspace=0.50, hspace=0.20, top=0.85, bottom=0.05)
for ax, case_data, title in zip(axes.flatten(), values, titles):
#ax.set_rgrids(['2', '4', '6', '8'])
#ax.set_rgrids(grid, labels=[str(i) for i in grid], size='large')
ax.set_ylim(0, 30)
ax.set_title(title, weight='bold', fontsize=42, position=(0.5, 1.1),
horizontalalignment='center', verticalalignment='center')
line = ax.plot(theta, case_data)
ax.fill(theta, case_data, alpha=0.25)
ax.set_varlabels(labels, fontsize=32)
if figure_title is None:
plt.show()
else:
plt.savefig(f'../charts/{figure_title}.pdf', format='pdf', quality=95)
level_0 = base.groupby(level=0).sum(axis=1)
level_0 = level_0.reindex(["newspaper metadata",
"apis",
"connectivity",
"information on digitization",
"enrichment",
"user interaction",
"viewer",
"result display",
"result filtering",
"result sorting",
"search",
"browsing"
])
level_0.head()
#Total sum per row, ie. per feature family (=> how good are all interfaces for a certain aspect):
level_0.loc[:,'Total'] = level_0.sum(axis=1)
level_0['Total (%)'] = level_0['Total']/level_0['Total'].sum() * 100
# getting the labels
labels = level_0.index
labels
# Get all rows, just for the first columns, and transpose it (for the radar factory)
level_0.iloc[:12, :1].T
# take only the values
first_interface = level_0.iloc[:12, :1].T.values
build_single_radar(labels, values=first_interface, title=level_0.columns[0], grid=[2,4,6,8], figure_title='Anno')
# take the data: all rows and up to the 23th columns (selecting precisely in case Total columns are added)
all_interfaces_counts = level_0.iloc[:12, :23].T.values
# same with percentages
level_0_percent = base.groupby(level=0).sum(axis=1).apply(lambda x: 100*x/float(x.sum()))
level_0_percent.head()
# checking we have 100 everywhere
level_0_percent.sum()
level_0_percent.max().max()
all_interfaces_percents = level_0_percent.iloc[:12, :23].T.values
build_multiple_radar(labels, all_interfaces_percents, level_0.columns[:23], [10, 20, 30], 'all-interfaces-single')
# take only the last column: Total per feature family, in percent
values = level_0.iloc[:,24:].T.values
values
build_single_radar(labels, values, "All interfaces",grid=[5,10,15,20],figure_title='all-interfaces-global')
search = base.loc['search'].copy()
search.loc[:,'Total'] = search.sum(axis=1)
search['Total (%)'] = search['Total']/search['Total'].sum() * 100
search.head()
labels_search = search.index
labels_search
values_search = search.iloc[:,25:].T.values
values_search
build_single_radar(labels_search, values_search, title="Search", grid=[2,4,6,8,10,12,14], figure_title="search")